library(tidyverse)
library(fixest)
library(sf)
library(here)
library(broom)
library(fixest)
library(modelsummary)
library(gt)
library(kableExtra)
library(patchwork)
library(haven)
library(rmapshaper)
library(lubridate)
library(patchwork)
library(gridExtra)

# Load data ---------------------------------------------------------------

# All municipalities 
all_elections_bavaria <- read_rds(here("./data/all_elections_bavaria.rds"))


# Main data, only march transit Kreise are kept & year < 1954
analysis_sample <- read_rds(here("./data/analysis_sample.rds"))

# Raw march data 
arolsen_marches <- read_rds(here("./data/arolsen_marches.rds"))

# additional spatial files
dachau_floss_coords <- read_rds(here("./data/dachau_floss_coords.rds"))


# read simplified shapefile for faster plotting
bavaria_shp <- read_rds(here("./data/bavaria_shp.rds"))

bavaria_kreis_shp <- st_read(here("./data/bavaria_lkr_simplified/bavaria_lkr_simplified.shp"), 
                             quiet = T)

nazi_mdbs <- read_rds(here("./data/nsdap_bt_mps.rds"))


# Figure 3: Intensity of death marches -----

march_trend <- arolsen_marches %>% 
  group_by(date_x, unique_route) %>% 
  summarise(march_size_day_avg = mean(strength, na.rm = T), 
            victims = sum(victims, na.rm = T)) %>% 
  group_by(date_x) %>% 
  summarise(victims = sum(victims, na.rm = T), 
            size = sum(march_size_day_avg, na.rm =T), 
            num = length(unique(unique_route))) %>% 
  ungroup() %>% 
  complete(date_x = seq(min(ymd(arolsen_marches$date_x), na.rm = T), 
                        max(ymd(arolsen_marches$date_x), na.rm =T), by = "1 days"),
           fill = list(victims = 0, 
                       size = 0, 
                       num = 0)) 

march_trend_plot <- march_trend %>% 
  pivot_longer(cols = -date_x) %>% 
  ggplot(., aes(x = as.POSIXct(date_x), y = value)) + 
  facet_wrap(~name, scale = "free_y", 
             labeller = labeller(name = c("size" = "March size", 
                                          "victims" = "Number of victims", 
                                          "num" = "Number of marches"))) + 
  geom_line()  +
  theme_bw() + 
  scale_y_continuous(labels = scales::comma) +
  scale_x_datetime(date_breaks = "week", date_labels = "%b\n%d") +
  labs(y = "", x = "Date") +
  theme(panel.grid = element_blank())


ggsave(march_trend_plot, filename = here("./figures/figure_3.pdf"), 
       scale = 0.6, width = 12.5, height = 4)



# Figure 4: Right-wing voting --------------------------------


right_wing_parties_share_plot <- all_elections_bavaria %>% 
  filter(year <= 1956) %>% 
  group_by(year, election_type) %>% 
  summarise(right_wing_vote_share = sum(right_wing_votes) / sum(valid_votes)) %>% 
  ggplot(., aes(x = factor(year), fill =  election_type, y = right_wing_vote_share)) +
  geom_col(color = "black") +
  scale_fill_manual("Election type", 
                    values = c("gray90", "gray14"), 
                    labels = c( "National", "State")) +
  theme_bw() + 
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Election year", y = "Right-wing nationalist parties' vote share") +
  theme(panel.grid.major.x = element_blank(), 
        panel.grid.minor.x = element_blank(), 
        panel.grid = element_blank(),
        legend.position = "bottom")

print(right_wing_parties_share_plot)

ggsave(right_wing_parties_share_plot, filename = here("./figures/figure_4.pdf"), 
       width = 8, height = 6, scale = 0.7)



# Figure 5: Death march map ---------------------------------------------------------------------


# prepare data for mapping

map_muni_df <- left_join(
  bavaria_shp,
  analysis_sample %>%
    filter(year == 1946),
  by = c("SCH" = "code_new")
) %>% 
  
  
  mutate(treat_cat_transit = ifelse(treat_march_transit == 1, "March transit", "No transit")) %>%
  mutate(
    treat_cat_victim = case_when(
      transit_victim_cat == "No march" ~ "No march transit",
      transit_victim_cat == "March (transit only)" ~ "March transit",
      treat_march_victims == "Low victims" ~ "Low victims",
      treat_march_victims == "High victims" ~ "High victims"
    ),
    treat_cat_victim = fct_relevel(
      treat_cat_victim,
      c("No march transit", 
        "March transit",
        "Low victims", 
        "High victims")
    )
  )


# build final map plot
dm_map_victims <- ggplot() + 
  # base map
  geom_sf(data = bavaria_shp %>% rmapshaper::ms_simplify(keep = 0.01), 
          fill = "gray65", size = 0.1) +
  
  # muni w. death marches
  geom_sf(data = map_muni_df %>% select(treat_cat_victim) %>% rmapshaper::ms_simplify(keep = 0.01),
          aes(fill = treat_cat_victim ), 
          size = 0.1) +
  
  scale_fill_manual("Treatment status", values = c("white", "#ffffb2", "#fd8d3c",  "#bd0026"),
                    na.value = "gray63") +
  
  # add kreis shapes
  geom_sf(data = bavaria_kreis_shp %>% rmapshaper::ms_simplify(keep = 0.3), 
          # alpha = 0,
          fill = NA,
          size = 0.5, color = "black") +
  
  
  
  geom_sf(data = dachau_floss_coords, 
          size = 4) + 
  ggrepel::geom_label_repel(
    data = dachau_floss_coords,
    aes(label = MAIN, geometry = geometry),
    stat = "sf_coordinates",
    min.segment.length = 0, 
    nudge_x = c(-45000, 45000),
    nudge_y = 25000
  )  +
  theme_void() + 
  theme(panel.grid = element_blank(), 
        legend.position = "bottom")


ggsave(dm_map_victims, filename = "./figures/figure_5_map.pdf", 
       width = 8, height = 9, scale = 0.8)



# Figure 6: Victim distribution --------------------------------------

treat_cat_dist <- analysis_sample %>% 
  ggplot(., aes(x = treat_march_victims)) +
  geom_bar() + 
  labs(x = "March violence (categorical)", y = "Number of observations") + 
  theme_bw() +
  theme(panel.grid = element_blank()) 

treat_victims_cont_dist <- analysis_sample %>% 
  
  filter(year == 1946) %>%
  mutate(cut_victims = cut(sum_bodies_am, breaks = c(0, 1, 5, 10, 25, 50, 75, 100, 200, 500, 1000), right = F,
                           labels = c("0", "1-4", "5-9", "10-24", "25-49", "50-74", "75-99", "100-199", "200-499", "500+")
  )) %>% 
  count(cut_victims) 

cat_list <- as.character(treat_victims_cont_dist$cut_victims)

cat_label <- tibble(
  x = c(1, 3.5, 8), 
  y = c(6000), 
  label = c("No victims", "Low victims", "High victims")
)


treat_victims_cont_dist_plot <- treat_victims_cont_dist %>% 
  ggplot(., aes(x = cut_victims ,  y = n)) +
  geom_rect(aes(ymin = -Inf, ymax = +Inf, 
                xmin = which(cat_list == "0") - .5, 
                xmax = which(cat_list == "0") + 0.5), 
            fill = "grey90", alpha = 0.25) +
  geom_rect(aes(ymin = -Inf, ymax = +Inf, 
                xmin = which(cat_list == "1-4") - 0.5, 
                xmax = which(cat_list == "25-49") + 0.5), 
            fill = "grey70", alpha = 0.25) +
  geom_rect(aes(ymin = -Inf, ymax = +Inf, 
                xmin = which(cat_list == "50-74") - 0.5, 
                xmax = which(cat_list == "500+") + 0.5), 
            fill = "grey50", alpha = 0.25) +
  geom_col(size = .25, color = "black") +
  labs(x = "\nNumber of march victims in municipality", y = "Number of observations (log scale)") +
  geom_text(data = cat_label, inherit.aes = F, aes(x = x, y = y, label = label)) + 
  scale_x_discrete() + 
  theme_bw() +
  theme(panel.grid = element_blank()) +
  scale_y_continuous(trans=scales::pseudo_log_trans(base = 10),
                     breaks = c(0, 10, 100, 1000),
                     limits = c(0, 8000)) +
  geom_vline(xintercept = .5, size = .25) +
  geom_vline(xintercept = 5.5, size = .25) +
  geom_vline(xintercept = 1.5, size = .25)


ggsave(treat_victims_cont_dist_plot, 
       scale = 0.8, filename = "./figures/figure_6.pdf", 
       width = 11, height = 4)




# Figure 7: Covariate balance ----------------------------------------------------


## Define covariate vector -----------------------------------------------


covariates <- c(
  
  # Gemeindeverzeichnis / socio-econ variables
  "log(population_1939)",
  "tax_income_pc_1938",
  
  "num_land_betriebe_1939",
  "occ_pop_industry_share_1939",
  "occ_pop_public_servants_1939",
  "migration_1933_1939",
  
  # repression infrastructure & WW2 violence
  "conc_camp_sub_count",
  "distance_nearest_subcamp",
  "distance_nearest_maincamp",
  "ww2_bombs_tons",
  "deportations",
  
  
  # infrastructure
  "elevation",
  "ruggedness",
  "train_main",
  "train_minor",
  "forest_cover",
  
  # political variables
  "protestant_share_1939",
  "nsdap_share_aw",
  "socialist_share_aw"
  
  
)


## Balance plot: atrocities --------------------------------------------------



cov_balance_data <- analysis_sample %>% 
  filter(year == 1946) %>% 
  mutate(
    conc_camp_sub_count = as.numeric(scale(conc_camp_sub_count)),
    distance_nearest_subcamp = as.numeric(scale(distance_nearest_subcamp)),
    distance_nearest_maincamp = as.numeric(scale(distance_nearest_maincamp)),
    ww2_bombs_tons = as.numeric(scale(ww2_bombs_tons)),
    population_1939 = as.numeric(scale(log(population_1939 + 1))),
    protestant_share_1939 = as.numeric(scale(protestant_share_1939)),
    num_land_betriebe_1939 = as.numeric(scale(num_land_betriebe_1939)),
    tax_income_pc_1938 = as.numeric(scale(tax_income_pc_1938)),
    elevation = as.numeric(scale(elevation)),
    ruggedness = as.numeric(scale(ruggedness)),
    forest_cover = as.numeric(scale(forest_cover)),
    occ_pop_agriculture_share_1939 = as.numeric(scale(occ_pop_agriculture_share_1939 )),
    occ_pop_industry_share_1939 = as.numeric(scale(occ_pop_industry_share_1939)),
    occ_pop_public_servants_1939 = as.numeric(scale(occ_pop_public_servants_1939)),
    migration_1933_1939 = as.numeric(scale(migration_1933_1939)),
    deportations = as.numeric(scale(deportations)),
    train_main,
    nsdap_share_aw = as.numeric(scale(nsdap_share_aw)),
    socialist_share_aw = as.numeric(scale(socialist_share_aw)),
    train_minor) 



cov_balance_full <- feols(.[covariates] ~ 
                            treat_march_victims
                          
                          
                          | kreis_code 
                          , 
                          data = cov_balance_data ,
                          # family = "binomial",
                          se = "hetero") %>% 
  map_df(., tidy, conf.int = T, .id = "depvar") %>%
  mutate(sample = "Full sample, no death march FE")



cov_balance_within <- feols(.[covariates] ~ 
                              treat_march_victims
                            
                            
                            | kreis_code  + march_fixed_effect 
                            ,
                            data = cov_balance_data %>% 
                              filter(transit_victim_cat != "No march"),
                            # family = "binomial",
                            se = "hetero") %>% 
  map_df(., tidy, conf.int = T, .id = "depvar") %>%
  mutate(sample = "March sample only & death march FE")

cov_balance_output <- bind_rows(cov_balance_within) %>% 
  filter(grepl("High", term)) %>% 
  mutate(depvar = gsub("lhs\\: ","",  depvar)) %>% 
  mutate(depvar = fct_reorder(depvar, estimate))

cov_labels <-  c(
  
  # Gemeindeverzeichnis / socio-econ variables
  "tax_income_pc_1938" = "Tax income (1938)",
  "log(population_1939)" = "Population size (1939) log",
  "num_land_betriebe_1939" = "Number of agrarian\nbusinesses (1939)",
  "occ_pop_industry_share_1939" = "Population share working\nin industry (1939)", 
  "occ_pop_public_servants_1939" = "Population share working\nin the public service (1939)", 
  "migration_1933_1939" = "Net migration 1933-1939", 

  # repression infrastructure & WW2 violence
  "conc_camp_sub_count" = "Number of concentration\n subcamps in municipality",
  "distance_nearest_subcamp" = "Distance to nearest subcamp", 
  "distance_nearest_maincamp" = "Distance to Dachau/Flossenbuerg",
  "ww2_bombs_tons" = "Allied bombs (tons)",
  "deportations" = "Deportations (Jews)",
  
  
  # infrastructure
  "elevation" = "Elevation",
  "ruggedness" = "Terrain ruggedness",
  "train_main" = "Main train line crossing",
  "train_minor" = "Minor train line crossing", 
  "forest_cover" = "Percent covered in forest (1939", 
  
  # political variables
  "protestant_share_1939" = "Share of protestants (1939", 
  "nsdap_share_aw" = "NSDAP vote share in 1933",
  "socialist_share_aw" = "Socialist block vote share in 1933"
  
  
)

balance_plot <-ggplot(cov_balance_output, 
                      aes(x = depvar, 
                          y = estimate)) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  
  geom_point(position = position_dodge(width = .5)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, 
                position = position_dodge(width = .5)) + 
  coord_flip() +
  labs(
    y = "Coefficient estimate of\n'High victims' dummy", 
    x = "", 
    title = "March atrocities") +
  theme_bw() +
  theme(legend.position = "bottom", 
        legend.direction = "vertical",
        panel.grid = element_blank()

  ) +
  scale_x_discrete(labels = cov_labels)


## Balance plot: march transits ----------------------------------------------


cov_balance_transit <- feols(.[covariates] ~ 
                               treat_march_transit
                             
                             
                             | kreis_code 
                             , 
                             data = cov_balance_data ,
                             # family = "binomial",
                             se = "hetero") %>% 
  map_df(., tidy, conf.int = T, .id = "depvar") 

cov_balance_transit <- cov_balance_transit %>% 
  mutate(depvar = gsub("lhs\\: ", "", depvar), 
         depvar = fct_reorder(depvar, estimate))



balance_plot_transit <-ggplot(cov_balance_transit, 
                              aes(x = depvar, 
                                  y = estimate)) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  
  geom_point(position = position_dodge(width = .5)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, 
                position = position_dodge(width = .5)) + 
  coord_flip() +
  labs(
    y = "Coefficient estimate of\n'March transit' dummy", 
    x = "", 
    title = "March transits") +
  theme_bw() +
  theme(legend.position = "bottom", 
        legend.direction = "vertical",
        panel.grid = element_blank()

  ) +
  scale_x_discrete(labels = cov_labels)



ggsave(balance_plot_transit + balance_plot, filename = "./figures/figure_7.pdf", 
       width = 17, height = 12, scale = 0.55)

# Table 1 -----------------------------------------------------------


## Run main models ---------------------------------------------------------


models_categorical <- feols(
  c(right_wing_share) ~
    
    sw( 
      
      
      treat_march_victims, 
      treat_march_victims + .[covariates] ,
      treat_march_victims + .[covariates] + march_fixed_effect
      
    )
  
  | kreis_code + year , 
  cluster = c("code_new", "kreis_code"),
  data = analysis_sample )



models_bivariate <- feols(
  c(right_wing_share) ~

    sw(treat_march_victims, 
       treat_march_victims + .[covariates]), 
  cluster = c("code_new", "kreis_code"),
  data = analysis_sample )


models_pop_size <- feols(
  c(right_wing_share) ~
    
    sw( 
      
      treat_march_victims_pop + .[covariates],
      treat_viol_base + .[covariates]
      
    )
  
  | kreis_code + year + march_fixed_effect , 
  cluster = c("code_new", "kreis_code"),
  data = analysis_sample %>% 
    ungroup() %>% 
    mutate(treat_viol_base = case_when(treat_march_victims == "High victims" ~ "1", 
                                       treat_march_victims == "Low victims" ~ "0", 
                                       TRUE ~ NA_character_)) %>% 
    mutate(treat_viol_base = as.numeric(treat_viol_base))
)



## Build main table output -------------------------------------------------



coef_map <- c(
  
  
  "treat_march_victimsLow victims" = "Low Victims (1-49)",
  "treat_march_victimsHigh victims" = "High Victims (50+)",
  "treat_march_victims_pop" = "Number of victims/Population 1939",
  "treat_viol_base" = "High vs. low victims", 
  
  "expellee_share_aw" = "Expellees/Population 1939", 
  "treat_march_transit" = "March transit", 
  "treat_march_size_pop" = "March size/Population 1939",
  
  "treat_march_victimsLow victims:I(year > 1946)TRUE" = "Low Victims X Post-1946",
  "treat_march_victimsHigh victims:I(year > 1946)TRUE" = "High Victims X Post-1946")

my_gof_map <- modelsummary::gof_map %>%
  filter(grepl("adj\\.r|nobs", raw))

rows <- tribble( ~term,       ~model1, ~model2, ~model3, ~model4, ~model5, ~model6,
                 "Kreis FE", "", "Yes", "Yes", "Yes","Yes","Yes",
                 "Year FE",  "", "Yes", "Yes", "Yes","Yes","Yes",
                 "Covariates", "",   " ",   "Yes",      "Yes"  , "Yes"  , "Yes"  , 
                 "March FE", "",  " ",     " ",       "Yes","Yes","Yes"
)

attr(rows, 'position') <- c(9:14)

mod_paper <- c(models_bivariate[1], 
               models_categorical, 
               models_pop_size)

names(mod_paper) <- 1:6



## Save table --------------------------------------------------------------


modelsummary(as.list(mod_paper), 
             coef_map = coef_map,
             gof_map = my_gof_map,
             add_rows = rows,
             # output = "latex",
             stars = T) %>% 
  
  kableExtra::add_header_above(c(" " = 1, "DV: Nationalist Parties Vote Share" = 6) ) %>% 
  kableExtra::save_kable(file = "./tables/table_1.html")



# Figure 8: Alternative treatment cutoffs ---------------------------------


mod_treat_class <-   feols(right_wing_share ~
                             
                             sw(treatment_quart, 
                                treatment_quint, 
                                treatment_sext, 
                                treatment_sept, 
                                treatment_oct, 
                                treatment_dec
                             )  + 
                             .[covariates]
                           
                           | kreis_code + year , 
                           cluster = c("kreis_code", 
                                       "code_new"),
                           # split = ~ year,
                           data = analysis_sample 
)

treat_class_rob_df <- map_df(mod_treat_class, 
                             broom::tidy, 
                             conf.int = T) %>% 
  filter(grepl("treatment", term)) %>% 
  mutate(treat_status = rep(c("Low victims category", "High victims category"), 6), 
         class_cat = c("1_quartile", "1_quartile", 
                       "2_quintile", "2_quintile", 
                       "3_sextile", "3_sextile", 
                       "4_septile", "4_septile", 
                       "5_octile", "5_octile", 
                       "6_decile", "6_decile"))

coefplot_alt_cutoff_choice <- ggplot(treat_class_rob_df, 
                                     aes(x = class_cat, y = estimate, color = treat_status)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_point(size = 2, position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = estimate - 1.96 * std.error, 
                    ymax = estimate + 1.96 * std.error), 
                position = position_dodge(width = 0.5),
                width = 0) +
  theme_bw()  +
  scale_color_manual("Treatment group",
                     values = c( "gray14", "gray60"), 
                     labels = c("High victims", "Low victims")) +
  scale_x_discrete(labels = c("...top quartile", "...top quintile", 
                              "...top sextile", "...top septile", 
                              "...top octile", "...top decile")) +
  labs(x = "\"High\" victim category defined by...", 
       y = "Coefficient estimate") + 
  # caption = "Reference category is \"No victim\". \"Low\" percentile are the remaining percentiles") +
  theme(panel.grid = element_blank()) +
  theme(legend.position = "bottom")


ggsave(coefplot_alt_cutoff_choice, filename = here("./figures/figure_8.pdf"), 
       width = 8, height = 6, scale = .8)

 

# Figure 9: Effect over time ------------------------------------------------------


## Figure 9A ---------------------------------------------------------------



analysis_sample_long <- all_elections_bavaria %>%
  ungroup() %>%
  group_by(year) %>%
  filter(kreis_has_victim_transit_am == 1) %>%
  mutate(treat_march_victims = case_when(sum_bodies_am == 0 ~ "No victims",
                                         sum_bodies_am > 0 & sum_bodies_am < 50 ~ "Low victims",
                                         sum_bodies_am >= 50 ~ "High victims")) %>%
  mutate( treat_march_victims = fct_relevel(treat_march_victims, c("No victims", "Low victims", "High victims")))



models_over_time <- feols(right_wing_share ~
                            
                            treat_march_victims + 
                            
                            .[covariates]
                          
                          
                          | kreis_code + year + march_fixed_effect , 
                          cluster = c("code_new", "kreis_code"),
                          fsplit = ~year,
                          data = analysis_sample_long)


models_over_time_df <- models_over_time %>% 
  map_dfr(broom::tidy, conf.int = T, .id = "id") %>% 
  filter(grepl("treat_march", term)) %>% 
  mutate(id = gsub("sample.var\\: year; sample\\: ", "", id)) %>% 
  mutate(id = gsub("Full sample", "Full\nsample", id))


over_time_coefplot <- models_over_time_df %>% 
  ggplot(., aes(x = id, y = estimate, color = term)) + 
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_point(size = 2, position = position_dodge(width = 0.5)) + 
  geom_errorbar(aes(ymin = estimate - 1.96 * std.error, 
                    ymax = estimate + 1.96 * std.error), 
                position = position_dodge(width = 0.5),
                width = 0) +
  theme_bw() + 
  scale_color_manual("Treatment group",
                     values = c( "gray14", "gray60"), 
                     labels = c("High victims", "Low victims")) +
  geom_vline(xintercept = 14.5) + 
  theme(panel.grid = element_blank()) +
  theme(legend.position = "bottom") +
  labs(y = "Treatment estimate", x = "Election year")




## Figure 9B ---------------------------------------------------------------

all_nazi_trials <- read_rds(here("./data/nazi_trials.rds"))


nazi_trials_plot <- all_nazi_trials %>% 
  group_by(court_date_year) %>% 
  count() %>% 
  filter(court_date_year <= max(all_elections_bavaria$year)) %>%
  ggplot(., aes(x = court_date_year, y = n)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(1940, 2020, 5)) +
  theme_bw() + 
  # geom_vline(xintercept = as.Date("1949-08-14"))+ 
  theme(panel.grid = element_blank()) + 
  labs(x = "Year", y = "Number of Nazi Trials")



over_time_combined <- over_time_coefplot / nazi_trials_plot + plot_annotation(tag_levels = "A")

ggsave(over_time_combined, filename = here("./figures/figure_8.pdf"), 
       width = 11, height = 9, scale = .7)


# Supplementary Information ----------------------------------------------------------------


## Figure SI-1.1: Former Nazi party members in post-WWII parties -----------


nazi_mdbs_plot <- nazi_mdbs %>% 
  mutate(convinced_nazi = ifelse(nsdap_entry_date <= ymd("1933-03-01"), 1, 0)) %>% 
  ungroup() %>% 
  mutate(right_wing_parties = case_when(party == "DP" ~ "nationalist", 
                                        party == "WAV" ~ "nationalist", 
                                        party == "GB/BHE" ~ "nationalist", 
                                        TRUE ~ "other")) %>% 
  group_by(right_wing_parties) %>% 
  summarise(convinced_nazi_share = sum(convinced_nazi, na.rm = T) / n()) %>% 
  ungroup()  %>% 
  ggplot(., aes(x = right_wing_parties, y = convinced_nazi_share, fill = right_wing_parties)) + 
  geom_col(color = "black", width = 0.5, size = .5) +
  scale_x_discrete(labels = c("Nationalist\nright-wing parties", "Other parties")) + 
  scale_fill_manual("Counted as nationalist,\nright wing parties", 
                    
                    values = c( "brown", "darkgrey")) +
  labs(#title = "Former NSDAP members who joined before March 1933", 
    x = "", y = "Share of MPs with former NSDAP membership ") +
  theme_bw() +
  theme(legend.position = "none", 
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12)) +
  scale_y_continuous(labels = scales::percent)



ggsave(nazi_mdbs_plot, filename = "./figures/figure_SI-1.1.pdf",
       width = 8, height = 6, scale = 0.7)



## Table SI-3.1: Summary statistics ------------------------------------------------------



cov_labels <-  c(
  
  # Gemeindeverzeichnis / socio-econ variables
  "tax_income_pc_1938" = "Tax income (1938)",
  "log(population_1939)" = "Population size (1939) log",
  "num_land_betriebe_1939" = "Number of agrarian\nbusinesses (1939)",
  "occ_pop_industry_share_1939" = "Population share working\nin industry (1939)", 
  "occ_pop_public_servants_1939" = "Population share working\nin the public service (1939)", 
  "migration_1933_1939" = "Net migration 1933-1939", 

  # repression infrastructure & WW2 violence
  "conc_camp_sub_count" = "Number of concentration\n subcamps in municipality",
  "distance_nearest_subcamp" = "Distance to nearest subcamp", 
  "distance_nearest_maincamp" = "Distance to nearest main camp\n(Dachau/Flossenbuerg)",
  "ww2_bombs_tons" = "Number of bombs\ndropped on municipality in WW2",
  "deportations" = "Number of deported Jews\nfrom municipality",
  
  
  # infrastructure
  "elevation" = "Elevation",
  "ruggedness" = "Terrain ruggedness",
  "train_main" = "Main train line crossing",
  "train_minor" = "Minor train line crossing", 
  "forest_cover" = "Percent covered in forest (1939)", 
  
  # political variables
  "protestant_share_1939" = "Share of protestants (1939", 
  "nsdap_share_aw" = "NSDAP vote share in 1933",
  "socialist_share_aw" = "Socialist block vote share in 1933"
  
  
)

newnames <- cov_labels[-2]
oldnames <- names(cov_labels[-2])

vars_for_desc_stats <- analysis_sample %>% 
  ungroup() %>% 
  select(all_of(covariates[-1]), "population_1939", "right_wing_share", "treat_march_victims", "treat_march_victims_pop") %>% 
  mutate(treat_march_low = ifelse(treat_march_victims == "Low victims", 1, 0), 
         treat_march_high = ifelse(treat_march_victims == "High victims", 1, 0)) %>% 
  # filter(pct_dead > 0.1) %>% 
  rename("Right-wing block vote share" = "right_wing_share", 
         "March victims" = "treat_march_victims",
         "March victims (Low)" = "treat_march_low",
         "March victims (High)" = "treat_march_high", 
         "Population (1939)" = "population_1939",
         "March victims/Population" = "treat_march_victims_pop") %>% 
  rename_at(all_of(oldnames), ~newnames) %>% 
  as.data.frame()

# calculate table to get list of histograms
desc_stats <- datasummary(All(vars_for_desc_stats) ~ 
                            N + Mean + SD +  Min + Max, 
                          data = vars_for_desc_stats, 
                          output = "data.frame")

desc_stats <- desc_stats %>% 
  mutate(Type = ifelse(as.integer(Max) == 1, "Binary", "Continuous")) %>% 
  relocate(Type, .before = N) %>% 
  rename(Obs = N)

# print(xtable::xtable(desc_stats), 
#       file = here("./tables/main_analysis_sumstats.tex"), 
#       floating = F,
#       booktabs = T,
#       include.rownames = F,
#       tabular.environment = "tabular")

desc_stats %>% 
  gt::gt() %>% 
  gt::gtsave(filename = here("./tables/table_SI-3.1.html"))


## Figure SI-4.1: Leave-one-county-out test -------------------------------



models_out <- list()
counter <- 1

for(kreis in unique(analysis_sample$kreis_name)) {
  temp_mod <- feols(right_wing_share ~
                      
                      treat_march_victims + .[covariates]
                    
                    | kreis_code + year + march_fixed_effect, 
                    cluster = c("code_new", "kreis_code"),
                    data = analysis_sample %>% 
                      filter(kreis != kreis_name))
  
  models_out[[counter]] <- broom::tidy(temp_mod, .id = "treatment", conf.int = T) %>% 
    mutate(kreis = kreis) %>% 
    filter(grepl("treat", term))
  
  counter <- counter + 1
}

models_out <- map_dfr(models_out, as_tibble)

models_out <- models_out %>% 
  # remove low treatment category
  filter(!grepl("Low", term)) %>% 
  group_by(kreis) %>% 
  mutate(estimate_mean = mean(estimate)) %>% 
  ungroup() %>% 
  mutate(kreis = fct_reorder(kreis, estimate_mean))

loko_plot <- ggplot(models_out, 
                    aes(x = kreis, y = estimate, group = term)) + 
  geom_point( size = 2) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                width = 0) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  coord_flip() +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  labs(x = "Landkreis left out", y = "Coefficient estimate of \'High victims\' category")


ggsave(loko_plot, filename = here("./figures/figure_SI-4.1.pdf"), 
       width = 8, height = 13, scale = .7)



## Figure SI-4.2: Leave-one-party-out  -----------------------------------------------------

right_wing_parties <- c("wav_votes",
                        "d_bhe_votes",
                        "gesd_bhe_votes",
                        "bhv_votes",
                        "dp_votes",
                        "db_votes",
                        "drp_votes",
                        "vu_votes",
                        "dns_votes",
                        "dg_votes",
                        "gdp_votes",
                        "npd_votes")




models_out <- list()
counter <- 1

for(party in right_wing_parties) {
  
  analysis_sample <- analysis_sample %>% 
    rowwise() %>% 
    mutate(right_wing_votes_lopo = sum(c_across(any_of(right_wing_parties[right_wing_parties != party])))) %>%  
    ungroup() %>% 
    mutate(right_wing_share_lopo = right_wing_votes_lopo / valid_votes)
  
  print(paste0("Party left out: ", party))
  
  temp_mod <- feols(right_wing_share_lopo ~
                      
                      treat_march_victims + .[covariates]
                    
                    | kreis_code + year + march_fixed_effect, 
                    cluster = c("code_new", "kreis_code"),
                    data = analysis_sample )
  
  models_out[[counter]] <- broom::tidy(temp_mod, .id = "treatment", conf.int = T) %>% 
    mutate(party = party) %>% 
    filter(grepl("treat", term))
  
  counter <- counter + 1
  
}

models_out <- map_dfr(models_out, as_tibble)

models_out <- models_out %>% 
  # remove low treatment category
  filter(!grepl("Low", term)) %>% 
  mutate(party = fct_reorder(party, estimate))

lopo_plot <- ggplot(models_out, 
                    aes(x = party, y = estimate)) + 
  geom_point( size = 2) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), 
                width = 0) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_x_discrete(labels = c(csu_votes = "Christlich-Soziale Union in Bayern e.V. (CSU)",
                              spd_votes = "Sozialdemokratische Partei Deutschlands (SPD)",
                              fdp_votes = "Freie Demokratische Partei (FDP)",
                              kpd_votes = "Kommunistische Partei Deutschlands (verb.1956)",
                              wav_votes = "Wirtschaftliche Aufbau-Vereinigung (WAV)",
                              bp_votes = "Bayernpartei (BP)",
                              d_bhe_votes = "Deutscher Gemeinschaftsblock d.Heimatvertr.u.Entr.",
                              gesd_bhe_votes = "Gesamtdeutscher Block/Block d.Heimatvertr.u.Entr.",
                              bhv_votes = "Block der Heimatvertriebenen",
                              db_votes  = "Der Deutsche Block",
                              kp_votes = "Königspartei",
                              bnotg_votes = "Unpolit.Bäuerl.Notgemeinsch. - Hilfe f.d.Bay.Wal",
                              vwpe_votes = "Vereinigung wirtschaftl.u.politisch Entrechteter",
                              dp_eff_votes = "Bund d.Deutschen Partei f.Einh., Fried.u.Freiheit",
                              khe_block_votes = "Wahlblock d.Kriegsgesch.-Heimatvertrieb.-Entrecht",
                              vu_votes = "Vaterländische Union (VU)",
                              brbl_votes = "Bayerischer Rechtsblock (BRbl)",
                              dp_votes = "Deutsche Partei (DP)",
                              drp_votes = "Deutsche Reichs-Partei (DRP)",
                              dns_votes = "Nationale Sammlung (DNS)",
                              dmu_votes = "Deutscher Mittelstand (Union Deu.Mittelst.part.)",
                              gvp_votes = "Gesamtdeutsche Volkspartei (GVP)",
                              dg_votes = "Deutsche Gemeinschaft (DG)",
                              fu_votes = "Föderalistische Union (Bayernpartei/Zentrum) (FU)",
                              gehr_votes = "Gehr (Einzelbewerber Gehr",
                              gdp_votes = "Gesamtdeutsche Partei (DP-BHE) (GDP)",
                              npd_votes = "Nationaldemokratische Partei Deutschlands (NPD)")) + 
  coord_flip() +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  labs(x = "Party left out", y = "Coefficient estimate of\n\'High victims\' category")


ggsave(lopo_plot, filename = here("./figures/figure_SI-4.2.pdf"), 
       width = 8, height = 9, scale = .7)



## Figure SI-4.3: Additional balance tests------------------------------------------



cov_balance_within_deciles <- feols(.[covariates] ~ 
                                      sw(
                                        treatment_quart,
                                        treatment_quint,
                                        treatment_sext,
                                        treatment_sept,
                                        treatment_oct,
                                        treatment_dec
                                      )
                                    
                                    
                                    | kreis_code  + march_fixed_effect 
                                    ,
                                    data = cov_balance_data %>% 
                                      filter(transit_victim_cat != "No march"),
                                    # family = "binomial",
                                    se = "hetero") %>% 
  map_df(., tidy, conf.int = T, .id = "id") %>%
  mutate(sample = "March sample only & death march FE")

cov_balance_output <- cov_balance_within_deciles %>% 
  filter(grepl("High", term)) %>% 
  mutate(id = gsub("lhs\\: ","",  id)) %>% 
  separate(id, sep = "; rhs: ", into = c("cov", "treat_cutoff")) %>% 
  mutate(cov = fct_reorder(cov, estimate))


# compute coefficient plot for different treatment cutoffs
balance_plot_deciles <-ggplot(cov_balance_output, 
                              aes(x = cov, 
                                  y = estimate, 
                                  group = treat_cutoff, 
                                  color = treat_cutoff)) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  
  geom_point(position = position_dodge(width = .75)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0, 
                position = position_dodge(width = .75)) + 
  coord_flip() +
  labs(
    y = "Coefficient estimate of\n'High victims' dummy", 
    x = "Covariate (standardized)", 
    title = "") +
  theme_bw() +
  scale_color_brewer("High victim category\ndefined by...", 
                     palette = "Greys", direction = -1, 
                     labels = c("...top decile", 
                                "...top octile", 
                                "...top septile", 
                                "...top sextile", 
                                "...top quintile", 
                                "...top quartile")) +
  theme(legend.direction = "vertical",
        panel.grid = element_blank()  ) +
  scale_x_discrete(labels = cov_labels)



ggsave(balance_plot_deciles, filename = "./figures/figure_SI-4.3.pdf", 
       width = 14, height = 14, scale = 0.55)




## Figure SI-4.4: Equivalence test -----------------------------------------


### Functions ---------------------------------------------------------------


# These are taken from Hartman & Hidalgo (2018): https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/RYNSDG

equiv.t.test <- function(x, y, alpha = .05, epsilon = .2, std.err = "nominal", cluster.x = NULL, cluster.y = NULL) {
  x = x[!is.na(x)]
  y = y[!is.na(y)]
  
  
  dbar <- mean(x) - mean(y)
  m <- as.double(length(x))
  n <- as.double(length(y))
  N <- m+n
  x.var <-  var(x)
  y.var <- var(y)
  non.cent <- (m*n*epsilon^2)/N
  critical.const <- sqrt(qf(alpha,1,N-2,non.cent))
  
  se = sqrt((m-1)*x.var + (n-1)*y.var) / sqrt(m*n * (N-2)/N)
  df = N - 2
  
  t.stat <-  dbar / se
  p = pf(abs(t.stat)^2, 1, df , non.cent)
  obs_smd = (mean(x) - mean(y)) / sd(y)
  inverted <- try(uniroot(function(x) pf(abs(t.stat)^2, 1, N-2, ncp = (m*n*x^2)/N) - ifelse(pf(abs(t.stat)^2, 1, N-2, ncp = (m*n*0^2)/N) < alpha, pf(abs(t.stat)^2, 1, N-2, ncp = (m*n*obs_smd^2)/N), alpha), c(0,10*abs(t.stat)), tol = 0.0001)$root, silent = TRUE)
  if(class(inverted) == "try-error") {
    inverted = NA
  }
  rej = abs(t.stat) <= critical.const
  return(list(t.stat = t.stat, critical.const = critical.const, power = 2*pt(critical.const, N-2)-1, rej = rej, p = p, inverted = inverted))
}


run_equiv = function(X, Tr, epsilon.method = "std.effect", Y = NULL, 
                     custom.epsilon = NULL, std.err = "nominal", type = "equiv.t.test", 
                     fdr_correct = FALSE, cluster.x = NULL, cluster.y = NULL) {
  switch(epsilon.method,
         std.effect = {
           tol = abs(mean(Y[Tr == 1], na.rm = TRUE) - mean(Y[Tr == 0], na.rm = TRUE)) / sd(Y[Tr == 0], na.rm = TRUE)
         }, 
         custom = {
           if(is.null(custom.epsilon)) stop("ERROR: Must enter a custom epsilon value to use the 'custom' epsilon.method.")
           tol = custom.epsilon
         },
         strict = {
           tol = 0.36
         }, 
         liberal = {
           tol = 0.74
         }, 
         stop("ERROR: 'epsilon.method' not set to a valid option")
  )
  
  ranges = rep(tol, ncol(X))
  names(ranges) = names(X)
  print(ranges)
  
  # conduct equiv test for each variable
  tests = do.call("rbind", lapply(names(X), function(var) {
    print(var)
    
    res = equiv.t.test(X[Tr == 1, var], X[Tr == 0, var], epsilon = ranges[var], std.err = std.err, cluster.x = cluster.x, cluster.y = cluster.y)
    res$stat = res$t.stat
    res$obs_diff = (mean(X[Tr == 1, var], na.rm = TRUE) - mean(X[Tr == 0, var], na.rm = TRUE))
    res$obs_smd = ((mean(X[Tr == 1, var], na.rm = TRUE) - mean(X[Tr == 0, var], na.rm = TRUE))) / sd(X[Tr == 0, var], na.rm = TRUE)
    res$sd = sd(X[Tr == 0, var], na.rm = TRUE)
    
    return(c(res$inverted
             , round(res$p, 4)
             , res$obs_smd
             , res$obs_diff
             , res$power
             , res$sd))
    
  }))
  
  p.vals = unlist(tests[,2])
  names(p.vals) = names(ranges)
  
  power = unlist(tests[,5])
  names(power) = names(ranges)
  
  inverted = unlist(tests[,1])
  names(inverted) = names(ranges)
  
  sd = unlist(tests[,6])
  names(sd) = names(ranges)
  
  # return to scale of var
  inverted.scaled = unlist(lapply(names(inverted), function(var) {
    return(inverted[var] * sd[var])
  }))
  names(inverted.scaled) = names(ranges)
  
  observed.smd = unlist(tests[,3])
  names(observed.smd) = names(ranges)
  
  observed.diff = unlist(tests[,4])
  names(observed.diff) = names(ranges)
  
  # conduct BH FDR adjustment
  if(fdr_correct) {
    p.vals = p.adjust(p.vals, method = "BH")  
  }
  
  return(list(tol = ranges, inverted = inverted, inverted.scaled = inverted.scaled, p.vals = p.vals, observed.diff = observed.diff, observed.smd = observed.smd, power = power))
}

## a helper function to generate the plot
generate_plot = function(equiv_tests, panel.widths=c(1, 1, 5, 1, 1), display.names = NULL, var.rounding = 1, pval.rounding = 2, fdr_correct = FALSE) {
  .e <- environment()
  if(!is.null(display.names)) {
    equiv_tests$names = factor(display.names, levels = rev(display.names))
  } else {
    equiv_tests$names = factor(row.names(equiv_tests), levels = rev(row.names(equiv_tests)))
  }
  equiv_tests$const = rep(1, nrow(equiv_tests))
  equiv_tests = equiv_tests[nrow(equiv_tests):1, ]
  
  g = ggplot(equiv_tests, aes(x = names) )
  print(length(unique(equiv_tests$tol)))
  g = g + geom_linerange(aes(ymin = -1 * inverted, ymax = inverted), size = 5, color = "darkgray", alpha = 0.9)
  if(length(unique(equiv_tests$tol)) > 1){
    g = g + geom_linerange(aes(ymin = -1 * tol, ymax = tol), size = 10, color = 'gray')   
  } else {
    print("here")
    print(unique(equiv_tests$tol))
    lines = unique(equiv_tests$tol)
    g = g + geom_hline(yintercept = c(-1 * lines, lines), linetype = 2, size = 0.75)
  }
  g = g  + scale_shape_identity() + geom_point(aes(y = observed.smd), color = "black", shape = 18, size = 4)
  g = (g + coord_flip()
       + theme(
         axis.text.y = element_blank()
         , axis.ticks.y = element_blank()
         , axis.title.y = element_blank()
         , axis.text.x = element_text(size = 14)
         , axis.title.x = element_text(size = 14))
       + labs(x=NULL, y = paste("Equivalence Range (in standard deviations \u03C3)"), font=5) + theme_bw()
  )
  if(length(unique(equiv_tests$tol)) == 1) {
    g = g + ggtitle(paste0("Equivalence Tests  \n \n", "Equivalence Range: +/- ", round(unique(equiv_tests$tol), 2), "\u03C3 \n"))
  } else {
    g = g + ggtitle("Equivalence Tests  \n \n \n")
  }
  
  g_inv = ggplot(equiv_tests, aes(x = names, y = const, label = round(inverted.scaled, var.rounding)), environment = .e)
  g_inv = g_inv + geom_text()
  g_inv = (g_inv + theme_bw() + coord_flip()
           + theme(panel.grid.minor=element_blank()
                   , panel.grid.major=element_blank()
                   #, axis.line.x = element_blank()
                   , axis.text.x = element_text(color = "white", size = 14)
                   , axis.ticks.x = element_line(color = "white")
                   , axis.text.y = element_blank()
                   , axis.ticks.y = element_blank()
                   , axis.title.x = element_text(size = 14)
           )
           + ylim(1-.05, 1.05)
           + ggtitle("Equivalence\nConfidence\nInterval (+/-)\n(Scale of Var)")
           + labs(y = " ", x = NULL)
  )
  
  g_obs = ggplot(equiv_tests, aes(x = names, y = const, label = round(observed.diff, var.rounding)), environment = .e)
  g_obs = g_obs + geom_text()
  g_obs = (g_obs + theme_bw() + coord_flip()
           + theme(panel.grid.minor=element_blank()
                   , panel.grid.major=element_blank()
                   #, axis.line.x = element_blank()
                   , axis.text.x = element_text(color = "white", size = 14)
                   , axis.ticks.x = element_line(color = "white")
                   , axis.text.y = element_blank()
                   , axis.ticks.y = element_blank()
                   , axis.title.x = element_text(size = 14)
           )
           + ylim(1-.05, 1.05)
           + ggtitle("Observed\nMean\nDifference\n(Scale of Var)")
           + labs(y = " ", x = NULL)
  )
  
  g_pval = ggplot(equiv_tests, aes(x = names, y = const, label = round(p.vals, pval.rounding)), environment = .e)
  g_pval = g_pval + geom_text()
  g_pval = (g_pval + theme_bw() + coord_flip()
            + theme(panel.grid.minor=element_blank()
                    , panel.grid.major=element_blank()
                    #, axis.line.x = element_blank()
                    , axis.text.x = element_text(color = "white", size = 14)
                    , axis.ticks.x = element_line(color = "white")
                    , axis.text.y = element_blank()
                    , axis.ticks.y = element_blank()
                    , axis.title.x = element_text(size = 14)
            )
            + ylim(1-.05, 1.05)
            + ggtitle(ifelse(fdr_correct, "\nFDR\nCorrected\nP-value", "\n\n\nP-value"))
            + labs(y = " ", x = NULL)
  )
  
  g_var = ggplot(equiv_tests, aes(x = names, y = const, label = names))
  g_var = g_var + geom_text()
  g_var = (g_var + theme_bw() + coord_flip()
           + theme(panel.grid.minor=element_blank()
                   , panel.grid.major=element_blank()
                   #, axis.line.x = element_blank()
                   , axis.text.x = element_text(color = "white", size = 14)
                   , axis.ticks.x = element_line(color = "white")
                   , axis.text.y = element_blank()
                   , axis.ticks.y = element_blank()
                   , panel.border = element_blank()
                   , axis.title.x = element_text(size = 14)
           )
           + ylim(1-.05, 1.05)
           + ggtitle("\n \n \nVariable") + theme(plot.title = element_text(hjust = 0.5))
           + labs(y = " ", x = NULL)
  )
  
  grid.arrange(g_var, g_obs, g, g_inv, g_pval, ncol=5, nrow=1, widths= panel.widths, heights=c(4))
}


### Equiv. test covariates --------------------------------------------------------------



cov_list <- c(
  
  # Gemeindeverzeichnis / socio-econ variables
  "tax_income_pc_1938",
  "population_1939",
  "num_land_betriebe_1939",
  "occ_pop_industry_share_1939",
  "occ_pop_public_servants_1939",
  "migration_1933_1939",
  
  # repression infrastructure & WW2 violence
  "conc_camp_sub_count",
  "distance_nearest_subcamp",
  "distance_nearest_maincamp",
  "ww2_bombs_tons",
  "deportations",
  
  
  # infrastructure
  "elevation",
  "ruggedness",
  "train_main",
  "train_minor",
  "forest_cover",
  
  # political variables
  "protestant_share_1939",
  "nsdap_share_aw",
  "socialist_share_aw"
  
  
)



### Calculate stand. effect size ------------------------------------------------

# To produce the standardized effect size, I need to a) generate the regular effect size
# for the "high victims" dummy and b) the within-FE standard deviation 


# a) compute regular effect size
main_results_stand <- feols(
  c(right_wing_share) ~
    
    treat_march_victims + .[cov_list]
  
  | kreis_code + year  + march_fixed_effect , 
  cluster = c("code_new", "kreis_code"),
  data = analysis_sample)

main_result_coef <- main_results_stand$coefficients[2]


# b) compute within FE standard deviation of right wing share; see also https://doi.org/10.1017/psrm.2017.44
resid_y  <- feols(right_wing_share ~ 1
                  
                  | kreis_code + year  + march_fixed_effect , 
                  cluster = c("code_new", "kreis_code"),
                  data = analysis_sample %>% 
                    filter(treat_march_victims != "High victims")) # only variation in control group for Glass's Delta; see Hartman & Hidalgo

# variation of within-FH dep var used to estimate results
sd_resid_y <- sd(resid_y$residuals)

# epsilon is the standardized effect size (= coefficient / sd of within-FE dep var), as per Hartman & Hidalgo
epsilon <- abs(main_result_coef / sd_resid_y)

### Prepare equiv. test sample ----------------------------------------------------------


analysis_sample_equiv <- analysis_sample %>% 
  # cross section since covariates are time invariant
  filter(year == 1946) %>%
  ungroup() %>% 
  mutate(treated_equiv = ifelse(treat_march_victims == "High victims", 1, 0)) %>% 
  select(kreis_name, all_of(cov_list), treated_equiv, march_fixed_effect) %>% 
  group_by(march_fixed_effect, kreis_name) %>%
  mutate(across(.cols = -treated_equiv, function(x) x - mean(x, na.rm = T))) %>%
  ungroup() %>% 
  filter(complete.cases(.)) %>%
  as.data.frame()



### Run equivalence test ----------------------------------------------------


replicate_strict = as.data.frame(run_equiv(X = subset(analysis_sample_equiv, select = cov_list), 
                                           Tr = analysis_sample_equiv$treated_equiv, 
                                           epsilon.method = "custom", 
                                           custom.epsilon =  epsilon,
                                           fdr_correct = TRUE))

# cosmetics
cov_labels <-  c(
  
  # Gemeindeverzeichnis / socio-econ variables
  "tax_income_pc_1938" = "Tax income (1938)",
  "population_1939" = "Population size (1939)",
  "num_land_betriebe_1939" = "Number of agrarian\nbusinesses (1939)",
  "occ_pop_industry_share_1939" = "Population share working\nin industry (1939)", 
  "occ_pop_public_servants_1939" = "Population share working\nin the public service (1939)", 
  "migration_1933_1939" = "Net migration 1933-1939", 
  # "student_share_1939"
  
  # repression infrastructure & WW2 violence
  "conc_camp_sub_count" = "Number of concentration\n subcamps in municipality",
  "distance_nearest_subcamp" = "Distance to nearest subcamp", 
  "distance_nearest_maincamp" = "Distance to Dachau/Flossenbuerg",
  "ww2_bombs_tons" = "Allied bombs (tons)",
  "deportations" = "Deportations (Jews)",
  
  
  # infrastructure
  "elevation" = "Elevation",
  "ruggedness" = "Terrain ruggedness",
  "train_main" = "Main train line crossing",
  "train_minor" = "Minor train line crossing", 
  "forest_cover" = "Percent covered in forest (1939", 
  
  # political variables
  "protestant_share_1939" = "Share of protestants (1939", 
  "nsdap_share_aw" = "NSDAP vote share in 1933",
  "socialist_share_aw" = "Socialist block vote share in 1933"
  
  
)

row.names(replicate_strict) <- cov_labels[names(cov_labels) == row.names(replicate_strict)]

replicate_strict <- replicate_strict %>%
  arrange(p.vals)




### Output equiv. test plot --------------------------------------------------------------

ggsave(plot = generate_plot(replicate_strict, 
                            panel.widths = c(2.5, 1.25, 5, 1.2, 1.2), 
                            pval.rounding = 3, fdr_correct = TRUE), 
       device = cairo_pdf,
       width = 12, height = 7, filename = "./figures/figure_SI-4.4.pdf")

## Figure SI-6.1: Reconciliation efforts -------------------------------------------


newspapers <- readxl::read_excel(here("./data/newspaper word counts.xlsx"))


newspapers <- newspapers %>% 
  pivot_longer(cols = -c(year, das, der, die)) %>% 
  arrange(name, year) %>% 
  mutate(value_share = value / (das + die + das))

newspapers <- newspapers %>% 
  mutate(category = case_when(name == "kz" ~ "Holocaust", 
                              name == "juden" ~ "Holocaust", 
                              name == "entnazifizierung" ~ "Accountability",
                              name == "nürnberger" ~ "Accountability", 
                              name == "erinnerung" ~ "Memory", 
                              name == "gedenken" ~ "Memory", 
                              name == "schuld" ~ "Guilt", 
                              name == "täter" ~ "Guilt", 
                              TRUE ~ NA_character_)) %>% 
  mutate(name_clean = case_when(name == "kz" ~ " Concentration camp (\"kz\")", 
                                name == "juden" ~ " Jews (\"juden\")", 
                                name == "entnazifizierung" ~ " Denazification (\"entnazifizierung\")",
                                name == "nürnberger" ~ " Nuremberg trials (\"nürnberger\")", 
                                name == "erinnerung" ~ " Remembrance (\"erinnerung\")", 
                                name == "gedenken" ~ " Commemoration (\"gedenken\")", 
                                name == "schuld" ~ " Guilt (\"schuld\")", 
                                name == "täter" ~ " Perpetrator (\"täter\")", 
                                TRUE ~ NA_character_)) %>% 
  group_by(name) %>% 
  mutate(value_share = as.numeric(scale(value_share))) %>% 
  group_by(category) %>% 
  mutate(linetype = ifelse(!is.na(category), 
                           rep(c("dashed", "solid"), each = n() / 2), 
                           NA)) %>% 
  group_by(category, linetype) %>% 
  mutate(
    label_position_x = ifelse(linetype == "dashed", 
                              year[year == max(year)], 
                              year[year == min(year)]),
    label_position_y = ifelse(linetype == "dashed", 
                              value_share[year == max(year)], 
                              value_share[year == max(year)]))

np_labels <- newspapers %>% 
  group_by(category, linetype) %>% 
  summarise(name_clean = unique(name_clean), 
            x = unique(label_position_x), 
            y = unique(label_position_y)) %>% 
  ungroup() %>% 
  filter(!is.na(category))

np_plot_account <- newspapers %>% 
  filter(category == "Accountability") %>% 
  ggplot(., aes(x = year, y = value_share, color = name_clean,
                group = name_clean, linetype = name_clean)) + 
  geom_line() + 
  facet_wrap(~category, scales = "free") + 
  scale_color_brewer("Keywords:", 
                     palette = "Set1") +
  scale_linetype("Keywords:") + 
  
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "bottom") +
  labs(x = "", y = "")


np_plot_holo <- newspapers %>% 
  filter(category == "Holocaust") %>% 
  ggplot(., aes(x = year, y = value_share, color = name_clean,
                group = name_clean, linetype = name_clean)) + 
  geom_line() + 
  facet_wrap(~category, scales = "free") + 
  scale_color_brewer("Keywords:", 
                     palette = "Set1") +
  scale_linetype("Keywords:") + 
  
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "bottom") +
  labs(x = "", y = "Normalized share of term mentions", title = "")

np_plot_guilt <- newspapers %>% 
  filter(category == "Guilt") %>% 
  ggplot(., aes(x = year, y = value_share, color = name_clean,
                group = name_clean, linetype = name_clean)) + 
  geom_line() + 
  facet_wrap(~category, scales = "free") + 
  scale_color_brewer("Keywords:", 
                     palette = "Set1") +
  scale_linetype("Keywords:") + 
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "bottom") +
  labs(x = "", y = "")

np_plot_memory <- newspapers %>% 
  filter(category == "Memory") %>% 
  ggplot(., aes(x = year, y = value_share, color = name_clean,
                group = name_clean, linetype = name_clean)) + 
  geom_line() + 
  facet_wrap(~category, scales = "free") + 
  scale_color_brewer("Keywords:", 
                     palette = "Set1") +
  scale_linetype("Keywords:") + 
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        legend.position = "bottom") +
  labs(x = "", y = "Normalized share of term mentions", title = "")


np_plot <- (np_plot_holo + np_plot_account) / 
  (np_plot_memory + np_plot_guilt) & theme(legend.direction = "vertical")


ggsave(np_plot, filename = here("./figures/figure_SI-6.1.pdf"),
       width = 9, height = 9, scale = 0.8)






## Table SI-4.1: Expellee share --------------------------------------------------------



expel_mod <- feols(right_wing_share ~
                     
                     
                     sw(treat_march_victims +
                          expellee_share_aw +
                          .[covariates], 
                        treat_march_victims_pop +
                          expellee_share_aw +
                          .[covariates], 
                        
                        treat_viol_base +
                          expellee_share_aw +
                          .[covariates]
                        
                     )
                   
                   
                   | kreis_code + year + march_fixed_effect, 
                   cluster = c("code_new", "kreis_code"),
                   data = analysis_sample %>% 
                     ungroup() %>% 
                     mutate(treat_viol_base = case_when(treat_march_victims == "High victims" ~ "1", 
                                                        treat_march_victims == "Low victims" ~ "0", 
                                                        TRUE ~ NA_character_)) %>% 
                     mutate(treat_viol_base = as.numeric(treat_viol_base))
) 


rows <- tribble( ~term,       ~model1, ~model2, ~model3, 
                 "Kreis FE",  "Yes", "Yes", "Yes",
                 "Year FE",   "Yes", "Yes", "Yes",
                 "Covariates",   "Yes",   "Yes", "Yes",
                 "March FE",      "Yes","Yes","Yes"
)

attr(rows, 'position') <- c(11:16)

names(expel_mod) <- 1:3

modelsummary(as.list(expel_mod), 
             coef_map = coef_map,
             gof_map = my_gof_map,
             add_rows = rows,
             stars = T) %>% 
  
  kableExtra::add_header_above(c(" " = 1, "DV: Nationalist Parties Vote Share" = 3) ) %>% 
  kableExtra::save_kable(file = "./tables/table_SI-4.1.html")




## Table SI-5.1: March transits ----

march_transit_mod <- feols(c(right_wing_share) ~ 
                             sw(
                               
                               treat_march_transit +
                                 .[covariates]
                               
                             ) 
                           | kreis_code + year, 
                           cluster = c("code_new", "kreis_code"),
                           data = analysis_sample 
                           # filter(sum_bodies_am <= 0) 
)


foot_transit_mod <- feols(c(right_wing_share) ~ 
                            sw(
                              
                              
                              
                              foot_transit_only +
                                .[covariates]
                              
                            ) 
                          | kreis_code + year, 
                          cluster = c("code_new", "kreis_code"),
                          data = analysis_sample %>% 
                            # filter(sum_bodies_am <= 0) %>% 
                            filter(train_transit_only != 1)%>% 
                            filter(mixed_transit_only != 1)
)



train_transit_mod <- feols(c(right_wing_share) ~ 
                             sw(
                               
                               
                               
                               train_transit_only +
                                 .[covariates]
                               
                             ) 
                           | kreis_code + year, 
                           cluster = c("code_new", "kreis_code"),
                           data = analysis_sample %>% 
                             # filter(sum_bodies_am <= 0) %>% 
                             
                             filter(foot_transit_only!= 1) %>% 
                             filter(mixed_transit_only != 1)
)



march_size_mod <- feols(c(right_wing_share) ~ 
                          
                          
                          
                          
                          treat_march_size_pop + .[covariates]
                        
                        
                        | kreis_code + year, 
                        cluster = c("code_new", "kreis_code"),
                        data = analysis_sample 
)



models <- list(
  march_transit_mod,
  train_transit_mod, 
  foot_transit_mod, 
  
  march_size_mod
)


rows <- tribble( ~term,       ~model1, ~model2, ~model3, ~model4, 
                 "Kreis FE",  "Yes", "Yes", "Yes", "Yes", 
                 "Year FE",   "Yes", "Yes", "Yes", "Yes", 
                 "Covariates",   "Yes",   "Yes", "Yes",   "Yes"
                 
)

attr(rows, 'position') <- c(9:12)

names(march_transit_mod) <- 1:2

my_gof_map <- modelsummary::gof_map %>%
  filter(grepl("adj\\.r|nobs", raw))



coef_map <- c(
  
  "treat_march_transit" = "March transit", 
  "train_transit_only" = "March transit (train only)", 
  "foot_transit_only" = "March transit (foot only) ", 
  "treat_march_size_pop" = "March size/Population 1939"
)



modelsummary(models, 
             coef_map = coef_map,
             gof_map = my_gof_map,
             add_rows = rows,
             stars = T) %>% 
  kableExtra::add_header_above(c(" " = 1, "March transit" = 3, "March size" = 1) ) %>% 
  
  kableExtra::add_header_above(c(" " = 1, "DV: Nationalist Parties Vote Share" = 4) ) %>% 
  kableExtra::save_kable(file = "./tables/table_SI-5.1.html")








## Figure SI-7.1: Survey map--------------------------------------------------------------


bavaria_shp_simple <- bavaria_shp %>% rmapshaper::ms_simplify(keep = 0.05)

peasant_survey <- haven::read_dta("./data/peasant_survey.dta")

peasant_survey <- peasant_survey %>% 
  st_as_sf(coords = c("lon", "lat"), remove = FALSE) %>% 
  st_set_crs(4326) %>% 
  st_transform(st_crs(bavaria_shp))

peasant_survey <- peasant_survey %>% 
  mutate(sample_status = case_when(transit_victim_cat == 1 ~ "No march; excluded from sample", 
                                   transit_victim_cat == 2 ~ "March transit; no violence", 
                                   transit_victim_cat == 3 ~ "March transit; violence")) %>% 
  filter(!is.na(sample_status))

peasant_survey <- peasant_survey %>% 
  group_by(lon, lat, sample_status) %>% 
  summarise(n = n())

peasant_survey <- peasant_survey %>% 
  ungroup() %>% 
  mutate(alpha = ifelse(grepl("No march", sample_status), 0.45, 1))


survey_map <- ggplot()  + 
  geom_sf(data = bavaria_shp_simple, size = 0.05, fill = "white", color = "gray60")  +
  geom_sf(data = bavaria_kreis_shp, fill = NA, size = .5, color = "gray40") +
  geom_sf(data = peasant_survey,  shape = 21, size = 3,
          aes(fill = sample_status, alpha = alpha), color = "black") +
  scale_fill_manual("", values = c( "#fd8d3c",  "#bd0026", "gray55")) + 
  scale_alpha_identity() +
  theme_void()

ggsave(survey_map, filename = here("./figures/figure_SI-7.1.pdf"), 
       width = 12, height = 12, scale = 0.8)

